
% This file run all the  estimations. We only change the database
clear;clc;
diary off
warning off

%s=RandStream('mt19937ar');
%RandStream.setDefaultStream(s);
%reset(s,1001);

% 2011.9.13: This m-file run ecological inference repeatedly for bootstrap.

%Here we load the data to obtain the estimates 
load chile_all_new.mat %Estimates the effect for the whole country%
%load chile_male_new.mat % Estimates the effect for men
%load chile_female_new.mat % Estimates the effect for women
%load chile_1824yo_new.mat% Estimates the effect for voters between 18-24 yo
%load chile_2529yo_new.mat% Estimates the effect for voters between 25-29 yo 
%load chile_3034yo_new.mat% Estimates the effect for voters between 30-34 yo
%load chile_3539yo_new.mat% Estimates the effect for voters between 35-39 yo
%load chile_4044yo_new.mat% Estimates the effect for voters between 40-44 yo
%load chile_4549yo_new.mat% Estimates the effect for voters between 45-49 yo
%load chile_5054yo_new.mat% Estimates the effect for voters between 50-54 yo
%load chile_5559yo_new.mat% Estimates the effect for voters between 55-59 yo
%load chile_6064yo_new.mat% Estimates the effect for voters between 60-64 yo
%load chile_6569yo_new.mat% Estimates the effect for voters between 65-69 yo
%load chile_7074yo_new.mat% Estimates the effect for voters betweeb 70-74 yo
%load chile_75moreyo_new.mat% Estimates the effect by ages
% Two variables are loaded
% xzd_8020_demographics: stateicp, year(0=2004,1=2012), log income, age, education, gender, unemployed, married.
% yxd_8020_turnout: stateicp, year(0=1980,1=2000), republican votes, total voters.


NofResamples=100;

%Raw turnout data
t=yxd_0412_turnout;
t0=t(1:16,3:4);
t1=t(17:32,3:4);
delta=t1(:,1)./t1(:,2)-t0(:,1)./t0(:,2);%Raw comparison
xaxis=1:16;


% In 2 x Numofregions matrix, store regionid, and the last sample index of each region.
xzd_0412_demographics=sortrows(xzd_0412_demographics);


[tmp1,tmp2,tmp3]=unique(xzd_0412_demographics(:,1),'last');
states=[tmp1,tmp2];

%% With original samples...

    % Compute propensity score using probit regression, one with condition
    % on regionid and the other unconditioned(nation-wide).
    initial_propens=propen_score(xzd_0412_demographics);
    
    % Compute ecological inference. 
    initial_outcomes=inference(yxd_0412_turnout, xzd_0412_demographics, initial_propens);

    
    mu0L=initial_outcomes(:,1);
    mu0U=initial_outcomes(:,2);
    mu1L=initial_outcomes(:,3);
    mu1U=initial_outcomes(:,4);
    mu01L=initial_outcomes(:,5);
    mu01U=initial_outcomes(:,6);
    mu10L=initial_outcomes(:,7);
    mu10U=initial_outcomes(:,8);
    Edyp0=initial_outcomes(:,9);
    Edyp=initial_outcomes(:,10);
    
    
     
    display('With Original Samples:');
    
    display('E(Y | (X,D))');
        [Edyp0,Edyp]

    % conditional ATE and TT.
    display('Conditional ATE, Conditional TT');
    display('bounds on ATE for each state')
        [mu1L-mu0U, mu1U - mu0L]
    display('bounds on TT on treated (2000), TT on untreated (1980)')
	    [Edyp-mu01U, Edyp-mu01L, mu10L-Edyp0, mu10U-Edyp0]
    
        
        

    display('Bounds on marginal outcome distn Mu_0L, Mu0U, Mu_1L, Mu1U ')
        [mu0L, mu0U, mu1L, mu1U]

    display('Bounds on counterfactual outcome distn Mu_01L, Mu01U, Mu_10L, Mu10U')
	    [mu01L, mu01U, mu10L, mu10U]
    
    
    
%% Bootstraping...

%outcome variables: Mu_0L, Mu_0U, Mu_1L, Mu_1U, Mu_01L, Mu_01U.
%Store all outcomes for each Resample and for each region + national data.
outcomes=zeros(length(states)+1,10,NofResamples);


for rep=1:NofResamples
    
    display([num2str(rep),'/',num2str(NofResamples)])
    
    % Resampling for each stateicp: We keep the population of each region to
    % be the same for all resamples.
    resample=zeros(length(xzd_0412_demographics),1); 
    
    max_ind=0;
    for st=1:length(states)
    min_ind=max_ind + 1;
        max_ind=states(st,2);
        resample_by_state=randi([min_ind,max_ind], max_ind - min_ind + 1,1);
        resample(min_ind:max_ind)=sort(resample_by_state,1);
    end
    
    resample_demographics=xzd_0412_demographics(resample,:);
    
    % Compute propensity scores.
    resample_propens=propen_score(resample_demographics);
    
    % Compute ecological inference. 
    outcomes(:,:,rep)=inference(yxd_0412_turnout,resample_demographics,resample_propens);    

end

% Compute varian-covariance matrix of lower and upper bounds of ATE and TT.
variance_cov=zeros(6,6,length(states)+1);
shifted_outcomes=shiftdim(outcomes,1);% dimension: variables x repetition x region

for st=1:length(states)+1

    % tmp1=[mu0L, mu0U, mu1L, mu1U, mu01L, mu01U, YforXD(:,1:2)];we need  tmp1=[mu0L, mu0U, mu1L, mu1U, mu01L, mu01U,mu10L,mu10U, YforXD(:,1:2)]
    tmp1=shifted_outcomes(1:10,:,st)';
    % tmp2=[ATE_low, ATE_up, TT_low, TT_up] we need tmp2=[ATE_low, ATE_up, TT_low, TT_up, TTU_low,TTU_up]
    tmp2=[tmp1(:,3)-tmp1(:,2), tmp1(:,4)-tmp1(:,1), tmp1(:,10)-tmp1(:,6), tmp1(:,10)-tmp1(:,5),tmp1(:,8)-tmp1(:,9),tmp1(:,7)-tmp1(:,9)];
    
    variance_cov(:,:,st)=cov(tmp2);
end


%% Confidence interval of ATE and TT by Stoye(2009)

% Confidence Intervals of ATE.
ATE=[mu1L-mu0U, mu1U - mu0L];
CI_ATE=zeros(length(states)+1,2);

for st=1:length(states)+1
    CI_ATE(st,:)=stoye2009(ATE(st,1),ATE(st,2),variance_cov(1,1,st),variance_cov(2,2,st),variance_cov(1,2,st),NofResamples);
end

% Confidence Intervals of TT in year 2012.
TT=[Edyp-mu01U, Edyp-mu01L];
CI_TT=zeros(length(states)+1,2);

for st=1:length(states)+1    
    CI_TT(st,:)=stoye2009(TT(st,1),TT(st,2),variance_cov(3,3,st),variance_cov(4,4,st),variance_cov(3,4,st),NofResamples);
end



%Confidence Intervals of TTU in year 1980.

TTU=[mu10L-Edyp0,mu10U-Edyp0];
CI_TTU=zeros(length(states)+1,2);

for st=1:length(states)+1    
    CI_TTU(st,:)=stoye2009(TTU(st,1),TTU(st,2),variance_cov(5,5,st),variance_cov(6,6,st),variance_cov(5,6,st),NofResamples);
end


display('Confidence Intervals: ATE_Low, ATE_Up, TT_Low, TT_Up,TTU_Low,TT_Up');
[CI_ATE, CI_TT, CI_TTU];




%save eco_chile_all_paper_new
%save eco_chile_male_paper
%save eco_chile_female_paper
